In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import plotly.express as px
In [2]:
try:
    df=pd.read_excel("financial_loan.xlsx")
except FileNotFoundError as e:
    print(e)

Understanding & Data Cleaning¶

In [3]:
df
Out[3]:
id address_state application_type emp_length emp_title grade home_ownership issue_date last_credit_pull_date last_payment_date ... sub_grade term verification_status annual_income dti installment int_rate loan_amount total_acc total_payment
0 1077430 GA INDIVIDUAL < 1 year Ryder C RENT 2021-02-11 2021-09-13 2021-04-13 ... C4 60 months Source Verified 30000.0 0.0100 59.83 0.1527 2500 4 1009
1 1072053 CA INDIVIDUAL 9 years MKC Accounting E RENT 2021-01-01 2021-12-14 2021-01-15 ... E1 36 months Source Verified 48000.0 0.0535 109.43 0.1864 3000 4 3939
2 1069243 CA INDIVIDUAL 4 years Chemat Technology Inc C RENT 2021-01-05 2021-12-12 2021-01-09 ... C5 36 months Not Verified 50000.0 0.2088 421.65 0.1596 12000 11 3522
3 1041756 TX INDIVIDUAL < 1 year barnes distribution B MORTGAGE 2021-02-25 2021-12-12 2021-03-12 ... B2 60 months Source Verified 42000.0 0.0540 97.06 0.1065 4500 9 4911
4 1068350 IL INDIVIDUAL 10+ years J&J Steel Inc A MORTGAGE 2021-01-01 2021-12-14 2021-01-15 ... A1 36 months Verified 83000.0 0.0231 106.53 0.0603 3500 28 3835
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
38571 803452 NJ INDIVIDUAL < 1 year Joseph M Sanzari Company C MORTGAGE 2021-07-11 2021-05-16 2021-05-16 ... C1 60 months Verified 100000.0 0.1986 551.64 0.1299 24250 33 31946
38572 970377 NY INDIVIDUAL 8 years Swat Fame C RENT 2021-10-11 2021-04-16 2021-05-16 ... C1 60 months Verified 50000.0 0.0458 579.72 0.1349 25200 18 31870
38573 875376 CA INDIVIDUAL 5 years Anaheim Regional Medical Center D RENT 2021-09-11 2021-05-16 2021-05-16 ... D5 60 months Verified 65000.0 0.1734 627.93 0.1749 25000 20 35721
38574 972997 NY INDIVIDUAL 5 years Brooklyn Radiology D RENT 2021-10-11 2021-05-16 2021-05-16 ... D5 60 months Verified 368000.0 0.0009 612.72 0.1825 24000 9 33677
38575 682952 NY INDIVIDUAL 4 years Allen Edmonds F RENT 2021-07-11 2021-05-16 2021-05-16 ... F3 60 months Verified 80000.0 0.0600 486.86 0.2099 18000 7 27679

38576 rows × 24 columns

In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38576 entries, 0 to 38575
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   id                     38576 non-null  int64         
 1   address_state          38576 non-null  object        
 2   application_type       38576 non-null  object        
 3   emp_length             38576 non-null  object        
 4   emp_title              37138 non-null  object        
 5   grade                  38576 non-null  object        
 6   home_ownership         38576 non-null  object        
 7   issue_date             38576 non-null  datetime64[ns]
 8   last_credit_pull_date  38576 non-null  datetime64[ns]
 9   last_payment_date      38576 non-null  datetime64[ns]
 10  loan_status            38576 non-null  object        
 11  next_payment_date      38576 non-null  datetime64[ns]
 12  member_id              38576 non-null  int64         
 13  purpose                38576 non-null  object        
 14  sub_grade              38576 non-null  object        
 15  term                   38576 non-null  object        
 16  verification_status    38576 non-null  object        
 17  annual_income          38576 non-null  float64       
 18  dti                    38576 non-null  float64       
 19  installment            38576 non-null  float64       
 20  int_rate               38576 non-null  float64       
 21  loan_amount            38576 non-null  int64         
 22  total_acc              38576 non-null  int64         
 23  total_payment          38576 non-null  int64         
dtypes: datetime64[ns](4), float64(4), int64(5), object(11)
memory usage: 7.1+ MB
In [5]:
df[df.duplicated()]
Out[5]:
id address_state application_type emp_length emp_title grade home_ownership issue_date last_credit_pull_date last_payment_date ... sub_grade term verification_status annual_income dti installment int_rate loan_amount total_acc total_payment

0 rows × 24 columns

In [6]:
df.isnull().sum()
Out[6]:
id                          0
address_state               0
application_type            0
emp_length                  0
emp_title                1438
grade                       0
home_ownership              0
issue_date                  0
last_credit_pull_date       0
last_payment_date           0
loan_status                 0
next_payment_date           0
member_id                   0
purpose                     0
sub_grade                   0
term                        0
verification_status         0
annual_income               0
dti                         0
installment                 0
int_rate                    0
loan_amount                 0
total_acc                   0
total_payment               0
dtype: int64
In [7]:
catcols=df.select_dtypes(include=['object','category']).columns.tolist()
for i in catcols:
    print(df[i].value_counts())
    print('--------------------------------------------')
address_state
CA    6894
NY    3701
FL    2773
TX    2664
NJ    1822
IL    1486
PA    1482
VA    1375
GA    1355
MA    1310
OH    1188
MD    1027
AZ     833
WA     805
CO     770
NC     759
CT     730
MI     685
MO     660
MN     592
NV     482
SC     464
WI     446
OR     436
AL     432
LA     426
KY     320
OK     293
KS     260
UT     252
AR     236
DC     214
RI     196
NM     183
HI     170
WV     167
NH     161
DE     110
MT      79
WY      79
AK      78
SD      63
VT      54
MS      19
TN      17
IN       9
ID       6
NE       5
IA       5
ME       3
Name: count, dtype: int64
--------------------------------------------
application_type
INDIVIDUAL    38576
Name: count, dtype: int64
--------------------------------------------
emp_length
10+ years    8870
< 1 year     4575
2 years      4382
3 years      4088
4 years      3428
5 years      3273
1 year       3229
6 years      2228
7 years      1772
8 years      1476
9 years      1255
Name: count, dtype: int64
--------------------------------------------
emp_title
US Army                             135
Bank of America                     109
IBM                                  67
AT&T                                 63
Wells Fargo                          57
                                   ... 
CH Newton Builders, Inc               1
NGS Associates, Inc.                  1
Ridgecrest Capital Partners           1
rice &werthmann                       1
Five County Assoc of Governments      1
Name: count, Length: 28525, dtype: int64
--------------------------------------------
grade
B    11674
A     9689
C     7904
D     5182
E     2786
F     1028
G      313
Name: count, dtype: int64
--------------------------------------------
home_ownership
RENT        18439
MORTGAGE    17198
OWN          2838
OTHER          98
NONE            3
Name: count, dtype: int64
--------------------------------------------
loan_status
Fully Paid     32145
Charged Off     5333
Current         1098
Name: count, dtype: int64
--------------------------------------------
purpose
Debt consolidation    18214
credit card            4998
other                  3824
home improvement       2876
major purchase         2110
small business         1776
car                    1497
wedding                 928
medical                 667
moving                  559
house                   366
vacation                352
educational             315
renewable_energy         94
Name: count, dtype: int64
--------------------------------------------
sub_grade
B3    2834
A4    2803
A5    2654
B5    2644
B4    2455
C1    2089
B2    1990
C2    1972
B1    1751
A3    1740
C3    1490
A2    1440
D2    1314
C4    1202
C5    1151
D3    1144
A1    1052
D4     960
D1     913
D5     851
E1     750
E2     640
E3     538
E4     448
E5     410
F1     325
F2     243
F3     182
F4     163
F5     115
G1     101
G2      78
G4      56
G3      48
G5      30
Name: count, dtype: int64
--------------------------------------------
term
36 months    28237
60 months    10339
Name: count, dtype: int64
--------------------------------------------
verification_status
Not Verified       16464
Verified           12335
Source Verified     9777
Name: count, dtype: int64
--------------------------------------------

Exploratory Data Analysis (EDA)¶

Q: Total Loan Applications¶

In [8]:
print('Total Loan Applications:',df['id'].count())
Total Loan Applications: 38576

Q: MTD (Month-to-Date) Total Loan Applications¶

In [9]:
df['IDYM']=df['issue_date'].dt.strftime('%Y-%m')
lma=df[df['issue_date'].dt.to_period('M')==df['IDYM'].max()].shape[0]
print(f'Latest month Applications ({(df['IDYM'].max())}): {lma}')
Latest month Applications (2021-12): 4314

Q: MOM (Month-on-Month) Loan Applications¶

In [10]:
mom=df['IDYM'].value_counts().reset_index(name='TA').sort_values(by='IDYM')

plt.figure(figsize=(15,5))
plt.fill_between(mom['IDYM'],mom['TA'],color='skyblue',alpha=0.5)
plt.plot(mom['IDYM'],mom['TA'],marker='.',color='blue')

for x,y in zip(mom['IDYM'],mom['TA']):
    plt.text(x,y+50,str(y),ha='center',va='bottom')

plt.title('No. of Loan Applications per Month')
plt.xlabel('Year-Months')
plt.ylabel('Loan Applications')
plt.ylim(2000,4500)
plt.tight_layout()
plt.grid(linestyle='--',alpha=0.6)
plt.show()
No description has been provided for this image

Q: Total Funded Amount¶

In [11]:
print(f'Total Funded Amount: ${(df['loan_amount'].sum()/1000000).round(2)} M')
Total Funded Amount: $435.76 M

Q: MOM Total Funded Amount¶

In [12]:
mfi=df.groupby('IDYM')['loan_amount'].sum().reset_index(name='TLA').sort_values(by='IDYM')
mfi
Out[12]:
IDYM TLA
0 2021-01 25031650
1 2021-02 24647825
2 2021-03 28875700
3 2021-04 29800800
4 2021-05 31738350
5 2021-06 34161475
6 2021-07 35813900
7 2021-08 38149600
8 2021-09 40907725
9 2021-10 44893800
10 2021-11 47754825
11 2021-12 53981425
In [13]:
plt.figure(figsize=(15,5))
mfi['TLAM']=round(mfi['TLA']/1000000,2)
plt.fill_between(mfi['IDYM'],mfi['TLAM'],color='tomato',alpha=0.3)
plt.plot(mfi['IDYM'],mfi['TLAM'],marker='.',color='orangered')

for x,y in zip(mfi['IDYM'],mfi['TLAM']):
    plt.text(x,y+1,'$'+str(y)+'M',ha='center',va='bottom')

plt.title('Total Amount Funded per Month')
plt.xlabel('Year-Months')
plt.ylabel('Amount in Millions')
plt.ylim(20,60)
plt.grid(linestyle='--',alpha=0.6)
plt.show()
No description has been provided for this image

Q: Total Amount Received¶

In [14]:
print(f'Total Amount Received: ${round(df['total_payment'].sum()/1000000,2)}M')
Total Amount Received: $473.07M

Q: Average Interest Rate¶

In [15]:
print(f'Average Interest Rate: {(df['int_rate'].mean()*100).round(2)}%')
Average Interest Rate: 12.05%
In [16]:
#### Q: Average DTI (Debt-to-Income) Ratio:
print(f'Average DTI Ratio: {(df['dti'].mean()*100).round(2)}%')
Average DTI Ratio: 13.33%

Q: Good Loan Matrix¶

In [17]:
gl=df[df['loan_status'].isin(['Fully Paid','Current'])]
print(f'No. of GoodLoans: {gl['id'].count()}')
print(f'Funded Amt for GoodLoans: ${round(gl['loan_amount'].sum()/1000000,2)} M')
print(f'GoodLoans Amt Recieved: ${round(gl['total_payment'].sum()/1000000,2)} M')
print(f'GoodLoans Applications: {round(gl['id'].count()/df['id'].count()*100)}%')
No. of GoodLoans: 33243
Funded Amt for GoodLoans: $370.22 M
GoodLoans Amt Recieved: $435.79 M
GoodLoans Applications: 86%

Q: Bad Loan Matrix¶

In [18]:
bl=df[df['loan_status']=='Charged Off']
print(f'No. of BadLoans: {bl['id'].count()}')
print(f'Funded Amt for BadLoans: ${round(bl['loan_amount'].sum()/1000000,2)} M')
print(f'Loss Due to BadLoans: ${round(bl['total_payment'].sum()/1000000,2)} M')
print(f'Percentage of Loss: {round(bl['total_payment'].sum()/df['loan_amount'].sum()*100,2)}%')
print(f'BadLoans Applications: {round(bl['id'].count()/df['id'].count()*100)}%')
No. of BadLoans: 5333
Funded Amt for BadLoans: $65.53 M
Loss Due to BadLoans: $37.28 M
Percentage of Loss: 8.56%
BadLoans Applications: 14%

Q: Regional Analysis by State for Total Loan Applications¶

In [19]:
rta=df.groupby('address_state')['id'].count().reset_index(name='TLA').sort_values(by='TLA',ascending=False)

plt.figure(figsize=(15,10))
bar=plt.barh(rta['address_state'],rta['TLA'],color='skyblue',height=0.6)
plt.gca().invert_yaxis()
plt.title('Total Loan Applications per State')
plt.ylabel('State',fontsize=12)
plt.xlabel('Number of Loan Applications',fontsize=12)

plt.bar_label(bar,
              labels=[f"{v:,}" for v in rta['TLA']],
              fontsize=10,
              padding=5)

plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Regional Analysis by State for Total Fund Allocation¶

In [20]:
rfa=df.groupby('address_state')['loan_amount'].sum().reset_index(name='TFA').sort_values(by='TFA',ascending=False)

plt.figure(figsize=(15,10))
bar=plt.barh(rfa['address_state'],rfa['TFA']//1000000,color='lightcoral',height=0.6)
plt.gca().invert_yaxis()
plt.title('Total Fund Allocations per State ($)')
plt.ylabel('State',fontsize=12)
plt.xlabel('Amount in Millions',fontsize=12)

plt.bar_label(bar,
              labels=[f"{v}K" for v in (rfa['TFA']//1000)],
              fontsize=10,
              padding=5)

plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Regional Analysis by State for Total Amount Received¶

In [21]:
rar=df.groupby('address_state')['total_payment'].sum().reset_index(name='TAR').sort_values(by='TAR',ascending=False)

plt.figure(figsize=(15,10))
bar=plt.barh(rar['address_state'],rar['TAR']//1000000,color='green',height=0.6,alpha=0.7)
plt.gca().invert_yaxis()
plt.title('Total Amount Received per State ($)')
plt.ylabel('State',fontsize=12)
plt.xlabel('Amount in Millions',fontsize=12)

plt.bar_label(bar,
              labels=[f"{v}K" for v in (rar['TAR']//1000)],
              fontsize=10,
              padding=5)

plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Total Loan Application by Term¶

In [22]:
lat=df.groupby('term')['id'].count()
plt.figure(figsize=(5,5))
plt.pie(lat,labels=lat.index,autopct=lambda p: f'{p:.0f}%\n({int(p*lat.sum()/100)})',colors=['darkcyan','mediumturquoise'],startangle=90)
plt.title('Number of Loan Applications by Tenure')
plt.show()
No description has been provided for this image

Q: Total Funded Amount by Term¶

In [23]:
fat=df.groupby('term')['loan_amount'].sum()
plt.figure(figsize=(5,5))
plt.pie(fat,labels=fat.index,autopct=lambda p: f'{p:.0f}%\n(${p*fat.sum()/100/1e6:.1f}M)',colors=['orangered','coral'],startangle=90)
plt.title('Total Fund Allocated by Tenure')
plt.show()
No description has been provided for this image

Q: Total Amount Received by Term¶

In [24]:
art=df.groupby('term')['total_payment'].sum()
plt.figure(figsize=(5,5))
plt.pie(art,labels=fat.index,autopct=lambda p: f'{p:.0f}%\n(${p*fat.sum()/100/1e6:.1f}M)',colors=['olivedrab','greenyellow'],startangle=90)
plt.title('Total Amount Received by Tenure')
plt.show()
No description has been provided for this image
In [25]:
df['cov']=np.where(df['loan_status']=='Charged Off',1,0)

clt=df.groupby('term')['cov'].sum()

plt.figure(figsize=(5,5))
plt.pie(clt,labels=clt.index,startangle=90,colors=['darkred','tomato'],autopct=lambda p: f'{p:.0f}%\n({int(p*clt.sum()/100)})')
plt.title('Number of Loans Defaulted by Tenure')
plt.show()
No description has been provided for this image

Q: Total Loan Applications by Employee Len¶

In [26]:
ela=df.groupby('emp_length')['id'].count().reset_index(name='TA').sort_values(by='TA',ascending=True)

plt.figure(figsize=(15,5))
bar=plt.barh(ela['emp_length'],ela['TA'],color=plt.cm.Blues(ela['TA']/ela['TA'].max()))
plt.title("Number of Loan Applications by Client's Work Experience")
plt.xlabel("Number of Applications")
plt.ylabel("Work Experience")
plt.bar_label(bar,padding=5)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Loan Defaults as per Employee Len¶

In [27]:
cee=df.groupby('emp_length').agg(TLA=('id','count'),NLD=('cov','sum')).reset_index()
cee['Percent']=round(cee['NLD']/cee['TLA']*100)
cee = cee.sort_values('Percent', ascending=True)


plt.figure(figsize=(20,6))
bars = plt.barh(
    cee['emp_length'],
    cee['Percent'],
    color=plt.cm.Reds(cee['Percent'] / cee['Percent'].max())
)

plt.title("Number of Loans Defaulted &  Percentage by Client's Work Experience")
plt.xlabel("Default Rate (%)")
plt.ylabel("Work Experience")
plt.xlim(0,20)

for bar, nld, pct in zip(bars, cee['NLD'], cee['Percent']):
    plt.text(
        bar.get_width()+2.5,
        bar.get_y() + bar.get_height() / 2,
        f"Defaulted: {nld} | {pct:.0f}%",
        va='center',
        ha='right',
        fontsize=10,
    )

plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.subplots_adjust(left=0.3, right=0.95)
plt.show()
No description has been provided for this image

Q: Total Loan Applications by purpose¶

In [28]:
tap=df.groupby('purpose').agg(TotalLoans=('id','count')).reset_index().sort_values(by='TotalLoans',ascending=True)

plt.figure(figsize=(15,5))
bar=plt.barh(tap['purpose'],tap['TotalLoans'],color=plt.cm.viridis(tap['TotalLoans']/tap['TotalLoans'].max()))
plt.title("Number of Loan Applications by Purpose")
plt.xlabel("Number of Loan Applications")
plt.ylabel("Purposes")
plt.bar_label(bar,padding=5)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Total Amount Funded by purpose¶

In [29]:
taf=df.groupby('purpose').agg(FundedAmt=('loan_amount','sum')).reset_index().sort_values(by='FundedAmt',ascending=True)

plt.figure(figsize=(15,5))
bar=plt.barh(taf['purpose'],taf['FundedAmt']//1000,color=plt.cm.Wistia(taf['FundedAmt']/taf['FundedAmt'].max()))
plt.title("Total Amount Funded by Purpose ($)")
plt.xlabel(" Amount Funded")
plt.ylabel("Purposes")
plt.bar_label(bar,labels=[f'{v}k' for v in taf['FundedAmt']//1000],padding=5)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Loans Defaults by Purpose¶

In [30]:
ldp=df.groupby('purpose').agg(TLA=('id','count'),NLD=('cov','sum')).reset_index()
ldp['Percent']=round(ldp['NLD']/ldp['TLA']*100)
ldp = ldp.sort_values('Percent', ascending=True)

plt.figure(figsize=(15,5))
colors=plt.cm.autumn(np.linspace(1,0,ldp.shape[0]))
bar=plt.barh(ldp['purpose'],ldp['Percent'],color=colors)
plt.title("Number of Loan Defaults & Percentage by Purpose of Loan Required")
plt.xlabel("Number of Loan Defaults")
plt.ylabel("Purposes")
plt.xlim(0,35)

for bar,x,y in zip(bar,ldp['NLD'],ldp['Percent']):
    plt.text(
        bar.get_width()+4.5,
        bar.get_y()+bar.get_height()/2,
        f'Defaulted: {x} | {y:.0f}%',
        va='center',
        ha='right',
        fontsize=10,
    )
    
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Loan Applications by Home Ownership¶

In [31]:
aho=df.groupby('home_ownership')['id'].count().reset_index(name='TA')

plt.figure(figsize=(15,8))
fig=px.treemap(
    aho,
    path=['home_ownership'],
    color='TA',
    values='TA',
    color_continuous_scale='Blues',
    title='Number of Loan Applications by Home Ownership'
)
fig.update_traces(
    textinfo='label+value',
    textfont_size=12
)

fig.show()
<Figure size 1500x800 with 0 Axes>

Q: Total Amount Funded by Home Ownership¶

In [32]:
aho=df.groupby('home_ownership')['loan_amount'].sum().reset_index(name='TAF')

plt.figure(figsize=(15,8))
fig=px.treemap(
    aho,
    path=['home_ownership'],
    color='TAF',
    values='TAF',
    color_continuous_scale='Oranges',
    title='Total Amount Funded by Home Ownership'
)
fig.update_traces(
    textinfo='label+value',
    textfont_size=12
)

fig.show()
<Figure size 1500x800 with 0 Axes>

Q: No. of Loan Defaults by Home Ownership¶

In [33]:
ldho=df.groupby('home_ownership').agg(TLA=('id','count'),NLD=('cov','sum')).reset_index()
ldho['Percent']=round(ldho['NLD']/ldho['TLA']*100)
ldho=ldho.query("Percent>0")

plt.figure(figsize=(15,8))
fig=px.treemap(
    ldho,
    path=['home_ownership'],
    color='Percent',
    values='Percent',
    custom_data=['TLA', 'NLD', 'Percent'],
    color_continuous_scale='Reds',
    title='Loans Defaulted by Home Ownership'
)
fig.update_traces(
    texttemplate=
            "%{label}<br><br>"
            "Total Applications: %{customdata[0]}<br>"
            "Loans Defaulted: %{customdata[1]}<br>"
            "Percentage: %{customdata[2]}%<br>",
    textfont_size=12
)

fig.show()
<Figure size 1500x800 with 0 Axes>

Q: No. of Loans Default due to sources¶

In [34]:
lds=df.groupby('verification_status')['cov'].sum().reset_index(name='NLD').sort_values(by='NLD',ascending=True)

plt.figure(figsize=(12,3))
bar=plt.barh(lds['verification_status'],lds['NLD'],height=0.4,color='purple')
plt.title("Number of Loans Defaulted by Sources")
plt.xlabel("Number of Loans Defaulted")
plt.ylabel("Verification Status")
plt.bar_label(bar,padding=5)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Overview of Loan Applications¶

In [35]:
lts = df.groupby('loan_status')['id'].count()

plt.figure(figsize=(5,5))

colors = ['#2E7D32', '#C62828', '#F9A825']
explode = [0.08,0,0]

plt.pie(
    lts,
    labels=lts.index,
    autopct=lambda p: f'{p:.0f}%\n({int(p*lts.sum()/100):,})',
    colors=colors,
    explode=explode,
    startangle=90,
    counterclock=False,
    pctdistance=0.80,       
    labeldistance=1.05,
    wedgeprops=dict(width=0.4, edgecolor='white')
)

plt.text(0, 0, "LOANS\nSTATUS", ha='center', va='center',
         fontsize=14, fontweight='bold')

plt.title("Overview of Bank's Loans", fontsize=13, fontweight='bold')

plt.tight_layout()
plt.show()
No description has been provided for this image

Q: State wise Loan Defaults.¶

In [36]:
sld=df.groupby('address_state',as_index=False).agg(TC=('id','count'),TLD=('cov','sum'))
sld['Percent%']=round(sld['TLD']/sld['TC']*100)
In [37]:
sld_sorted = sld.sort_values(by='Percent%', ascending=False)

colors = plt.cm.Reds(
    sld_sorted['Percent%'] / sld_sorted['Percent%'].max()
)

plt.figure(figsize=(12,10))

bars = plt.barh(
    sld_sorted['address_state'],
    sld_sorted['Percent%'],
    color=colors,
    height=0.6
)

plt.gca().invert_yaxis()
plt.title('Volume & Rate of Defaulted Loans per State', fontsize=12, fontweight='bold')
plt.xlabel('Defaulted Rate (%)', fontsize=12)
plt.ylabel('State', fontsize=12)
plt.xlim(0,100)

for bar, d, p in zip(bars, sld_sorted['TLD'], sld_sorted['Percent%']):
    plt.text(
        bar.get_width()+1,
        bar.get_y() + bar.get_height()/2,
        f'Defaults: {d} | {p:.0f}%',
        va='center',
        fontsize=9
    )

plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Loan Defaults by total_acc¶

In [47]:
tald=df.groupby('total_acc')['cov'].sum().reset_index(name='TLD')
tald['Ratio']=round(tald['TLD']/df['cov'].sum()*100,2)
tald.query("Ratio>1")
Out[47]:
total_acc TLD Ratio
2 4 72 1.35
3 5 84 1.58
4 6 98 1.84
5 7 123 2.31
6 8 162 3.04
7 9 152 2.85
8 10 170 3.19
9 11 184 3.45
10 12 186 3.49
11 13 181 3.39
12 14 217 4.07
13 15 187 3.51
14 16 216 4.05
15 17 194 3.64
16 18 174 3.26
17 19 189 3.54
18 20 179 3.36
19 21 169 3.17
20 22 149 2.79
21 23 160 3.00
22 24 175 3.28
23 25 140 2.63
24 26 113 2.12
25 27 131 2.46
26 28 135 2.53
27 29 115 2.16
28 30 125 2.34
29 31 99 1.86
30 32 82 1.54
31 33 107 2.01
32 34 71 1.33
33 35 82 1.54
34 36 82 1.54
36 38 67 1.26
37 39 62 1.16
In [49]:
tald['total_acc'].corr(tald['Ratio'])
Out[49]:
np.float64(-0.8117562018798052)
In [50]:
# There is no relation between total no. of accounts and loan defaults.

Q: Loan Applications by Income category¶

In [87]:
def ic(val):
    if val<=50000:
        return 'Lower Class'
    elif val>50000 and val<=100000:
        return 'Lower Middle Class'
    elif val>100000 and val<=500000:
        return 'Middle Class'
    elif val>500000 and val<=2500000:
        return 'Upper Middle Class'
    else:
        return 'Super Class'
df['IncCat']=df['annual_income'].apply(ic)

taic=df.groupby('IncCat')['id'].count().reset_index(name='TLA').sort_values(by='TLA',ascending=True)

plt.figure(figsize=(15,5))
bar=plt.barh(taic['IncCat'],taic['TLA'],color=plt.cm.Blues(taic['TLA']/taic['TLA'].max()))
plt.title("Number of Loan Applications by Income Category")
plt.xlabel("Number of Applications")
plt.ylabel("Income Category")
plt.xlim(0,25000)
plt.bar_label(bar,padding=5)
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
No description has been provided for this image

Q: Loan defaulted by Income Category¶

In [117]:
ldic=df.groupby('IncCat')['cov'].sum().astype(int).reset_index(name='TLD').sort_values(by='TLD')
ldic=pd.merge(ldic,taic,on='IncCat',how='left')
ldic['DefaultRate']=(ldic['TLD']/ldic['TLA']*100).astype(int)
ldic=ldic.sort_values(by='DefaultRate')

plt.figure(figsize=(20,6))
bars = plt.barh(
    ldic['IncCat'],
    ldic['DefaultRate'],
    color=plt.cm.Reds(ldic['DefaultRate'] / ldic['DefaultRate'].max())
)

plt.title("Number of Loans Defaulted &  Percentage by Income Category")
plt.xlabel("Default Rate (%)")
plt.ylabel("Income Category")
plt.xlim(0,20)

for bar, nld, pct in zip(bars, ldic['TLD'], ldic['DefaultRate']):
    plt.text(
        bar.get_width()+2.5,
        bar.get_y() + bar.get_height() / 2,
        f"Defaulted: {nld} | {pct:.0f}%",
        va='center',
        ha='right',
        fontsize=10,
    )

plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.subplots_adjust(left=0.3, right=0.95)
plt.show()
No description has been provided for this image